1 /*
2 * Copyright (C) 2006 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package com.google.common.escape;
18
19 import static com.google.common.base.Preconditions.checkNotNull;
20
21 import com.google.common.annotations.Beta;
22 import com.google.common.annotations.GwtCompatible;
23
24 /**
25 * An object that converts literal text into a format safe for inclusion in a particular context
26 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
27 * text is performed automatically by the relevant parser.
28 *
29 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
30 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
31 * resulting XML document is parsed, the parser API will return this text as the original literal
32 * string {@code "Foo<Bar>"}.
33 *
34 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
35 * multiple threads.
36 *
37 * <p>Several popular escapers are defined as constants in classes like {@link
38 * com.google.common.html.HtmlEscapers}, {@link com.google.common.xml.XmlEscapers}, and {@link
39 * SourceCodeEscapers}. To create your own escapers extend this class and implement the {@link
40 * #escape(char)} method.
41 *
42 * @author Sven Mawson
43 * @since 15.0
44 */
45 @Beta
46 @GwtCompatible
47 public abstract class CharEscaper extends Escaper {
48 /** Constructor for use by subclasses. */
49 protected CharEscaper() {}
50
51 /**
52 * Returns the escaped form of a given literal string.
53 *
54 * @param string the literal string to be escaped
55 * @return the escaped form of {@code string}
56 * @throws NullPointerException if {@code string} is null
57 */
58 @Override public String escape(String string) {
59 checkNotNull(string); // GWT specific check (do not optimize)
60 // Inlineable fast-path loop which hands off to escapeSlow() only if needed
61 int length = string.length();
62 for (int index = 0; index < length; index++) {
63 if (escape(string.charAt(index)) != null) {
64 return escapeSlow(string, index);
65 }
66 }
67 return string;
68 }
69
70 /**
71 * Returns the escaped form of a given literal string, starting at the given index. This method is
72 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
73 * protected to allow subclasses to override the fastpath escaping function to inline their
74 * escaping test. See {@link CharEscaperBuilder} for an example usage.
75 *
76 * @param s the literal string to be escaped
77 * @param index the index to start escaping from
78 * @return the escaped form of {@code string}
79 * @throws NullPointerException if {@code string} is null
80 */
81 protected final String escapeSlow(String s, int index) {
82 int slen = s.length();
83
84 // Get a destination buffer and setup some loop variables.
85 char[] dest = Platform.charBufferFromThreadLocal();
86 int destSize = dest.length;
87 int destIndex = 0;
88 int lastEscape = 0;
89
90 // Loop through the rest of the string, replacing when needed into the
91 // destination buffer, which gets grown as needed as well.
92 for (; index < slen; index++) {
93
94 // Get a replacement for the current character.
95 char[] r = escape(s.charAt(index));
96
97 // If no replacement is needed, just continue.
98 if (r == null) continue;
99
100 int rlen = r.length;
101 int charsSkipped = index - lastEscape;
102
103 // This is the size needed to add the replacement, not the full size
104 // needed by the string. We only regrow when we absolutely must, and
105 // when we do grow, grow enough to avoid excessive growing. Grow.
106 int sizeNeeded = destIndex + charsSkipped + rlen;
107 if (destSize < sizeNeeded) {
108 destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index);
109 dest = growBuffer(dest, destIndex, destSize);
110 }
111
112 // If we have skipped any characters, we need to copy them now.
113 if (charsSkipped > 0) {
114 s.getChars(lastEscape, index, dest, destIndex);
115 destIndex += charsSkipped;
116 }
117
118 // Copy the replacement string into the dest buffer as needed.
119 if (rlen > 0) {
120 System.arraycopy(r, 0, dest, destIndex, rlen);
121 destIndex += rlen;
122 }
123 lastEscape = index + 1;
124 }
125
126 // Copy leftover characters if there are any.
127 int charsLeft = slen - lastEscape;
128 if (charsLeft > 0) {
129 int sizeNeeded = destIndex + charsLeft;
130 if (destSize < sizeNeeded) {
131
132 // Regrow and copy, expensive! No padding as this is the final copy.
133 dest = growBuffer(dest, destIndex, sizeNeeded);
134 }
135 s.getChars(lastEscape, slen, dest, destIndex);
136 destIndex = sizeNeeded;
137 }
138 return new String(dest, 0, destIndex);
139 }
140
141 /**
142 * Returns the escaped form of the given character, or {@code null} if this character does not
143 * need to be escaped. If an empty array is returned, this effectively strips the input character
144 * from the resulting text.
145 *
146 * <p>If the character does not need to be escaped, this method should return {@code null}, rather
147 * than a one-character array containing the character itself. This enables the escaping algorithm
148 * to perform more efficiently.
149 *
150 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
151 * not throw any exceptions.
152 *
153 * @param c the character to escape if necessary
154 * @return the replacement characters, or {@code null} if no escaping was needed
155 */
156 protected abstract char[] escape(char c);
157
158 /**
159 * Helper method to grow the character buffer as needed, this only happens once in a while so it's
160 * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
161 */
162 private static char[] growBuffer(char[] dest, int index, int size) {
163 char[] copy = new char[size];
164 if (index > 0) {
165 System.arraycopy(dest, 0, copy, 0, index);
166 }
167 return copy;
168 }
169
170 /**
171 * The multiplier for padding to use when growing the escape buffer.
172 */
173 private static final int DEST_PAD_MULTIPLIER = 2;
174 }